import requests
import urllib3
import re

# 由于忽略证书检查，每次运行就会有一个警告，以下代码进行忽略警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# 优品ppt网站：www.ypppt.com
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'cookie': 'Hm_lvt_45db753385e6d769706e10062e3d6453=1710816875; __gads=ID=48d1a9581d18d277:T=1710816876:RT=1710816876:S=ALNI_MblH8WFkmVnGbmDHUJxRJ7oci6IEQ; __gpi=UID=00000d448077103a:T=1710816876:RT=1710816876:S=ALNI_MZktneBHdK99Gg3jgpZjgOzaEh4yw; __eoi=ID=038ff346632d3217:T=1710816876:RT=1710816876:S=AA-AfjZkb_L2R1105BRoO0do9-2h; Hm_lpvt_45db753385e6d769706e10062e3d6453=1710817282',

}
url = 'https://www.ypppt.com/moban/'
# 请求网站时发现SSL的错误，使用verify=False进行忽略
res = requests.get(url, headers=headers, verify=False)
# 爬到数据中文是乱码,进行编码转换  提取数据
res.encoding = 'utf-8'
print(res.text)
#
ppt_info = re.findall('href="/article/.*?/(.*?).html" class="p-title" target="_blank">(.*?)</a>', res.text)
# ppId = re.findall('href="/article/.*?/(.*?).html" class="p-title"', res.text)
# print(ppt_info)
for id, name in ppt_info:
    url = 'https://www.ypppt.com/p/d.php?aid=' + id
    res = requests.get(url, headers=headers, verify=False)
    down_url = re.findall('href="(.*?)">下载地址1</a>', res.text)[-1]
    print(name, down_url)
# for id in pptId:
#     url = 'https://www.ypppt.com/p/d.php?aid=' + id
#     res = requests.get(url, headers=headers, verify=False)
#     # res.encoding = 'utf-8'
#     # print(res.text)
#     # break
#     #提取数据
#     down_url = re.findall('href="(.*?)">下载地址1</a>',res.text)
#     print(down_url)

# 模板主页：https://www.ypppt.com/moban/
# 某一个ppt的展示页面 https://www.ypppt.com/article/2020/6361.html
# 某一个ppt的下载页面 https://www.ypppt.com/p/d.php?aid=6361
# 下载地址 https://down.ypppt.com/uploads/soft/200725/1-200H5231057.zip
